library(tidyverse)
library(p8105.datasets)
library(plotly)
data("ny_noaa")

weather_df =
  ny_noaa %>% 
  mutate(date = as.character(date),
         prcp = prcp / 10,
         tmax = as.numeric(tmax),
         tmin = as.numeric(tmin),
         tmax = tmax / 10,
         tmin = tmin / 10) %>%
  separate(date, c("year", "month", "day")) %>% 
  mutate(month = as.numeric(month))

month_df =
  tibble(
    month = 1:12,
    month_name = month.name
  )

weather_df =
  left_join(weather_df, month_df, by = "month") %>% 
  select(-month)
weather_df %>%
  group_by(id, year) %>% 
  summarise(mean_tmax = mean(tmax),
            mean_tmin = mean(tmin)) %>% 
  mutate(text_label = id) %>%
  plot_ly(
    x = ~mean_tmax, y = ~mean_tmin, color = ~year,
    text = ~text_label, alpha = .5, type = "scatter", 
    mode = "markers", colors = "viridis")
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
## Warning: Ignoring 5491 observations
weather_df %>%
  mutate(month_name = fct_relevel(month_name, c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))) %>% 
  plot_ly(y = ~tmax, x = ~month_name, color = ~month_name,
          type = "box", colors = "viridis")
## Warning: Ignoring 1134358 observations
weather_df %>% 
  count(year) %>% 
  plot_ly(x = ~year, y = ~n, color = ~year,
          type = "bar", colors = "viridis")